This RMD describes the analysis of the 23 feature NMR dataset from Jingwei (Andrew Patterson) comparing lean/obese Caucasian and Chinese individuals with n=5 stool samples analysed for each of the four groups.

These 23 features were the most representative fecal metabolites with a good signal.

Setup

Libraries

library(readxl)
library(ggplot2)
library(dplyr)
library(tidyr)
library(tibble)
library(MicrobeR)
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] MicrobeR_0.3.2 tibble_3.0.1   tidyr_1.0.2    dplyr_0.8.5    ggplot2_3.3.0 
## [6] readxl_1.3.1  
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-144        ggtree_2.0.4        phyloseq_1.30.0    
##  [4] bit64_0.9-7         httr_1.4.1          tools_3.6.3        
##  [7] R6_2.4.1            vegan_2.5-6         DT_0.13            
## [10] rtk_0.2.5.8         DBI_1.1.0           lazyeval_0.2.2     
## [13] BiocGenerics_0.32.0 mgcv_1.8-31         colorspace_1.4-1   
## [16] permute_0.9-5       ade4_1.7-15         NADA_1.6-1.1       
## [19] withr_2.2.0         philr_1.12.0        tidyselect_1.0.0   
## [22] phangorn_2.5.5      bit_1.1-15.2        compiler_3.6.3     
## [25] Biobase_2.46.0      plotly_4.9.2.1      scales_1.1.0       
## [28] quadprog_1.5-8      stringr_1.4.0       digest_0.6.25      
## [31] rmarkdown_2.1       XVector_0.26.0      pkgconfig_2.0.3    
## [34] htmltools_0.5.0     htmlwidgets_1.5.1   rlang_0.4.5        
## [37] RSQLite_2.2.0       zCompositions_1.3.4 jsonlite_1.6.1     
## [40] magrittr_1.5        biomformat_1.14.0   Matrix_1.2-18      
## [43] Rcpp_1.0.4          munsell_0.5.0       S4Vectors_0.24.4   
## [46] Rhdf5lib_1.8.0      DECIPHER_2.14.0     ape_5.3            
## [49] lifecycle_0.2.0     stringi_1.4.6       yaml_2.2.1         
## [52] MASS_7.3-51.5       zlibbioc_1.32.0     rhdf5_2.30.1       
## [55] plyr_1.8.6          grid_3.6.3          blob_1.2.1         
## [58] parallel_3.6.3      crayon_1.3.4        lattice_0.20-38    
## [61] Biostrings_2.54.0   splines_3.6.3       multtest_2.42.0    
## [64] knitr_1.28          pillar_1.4.3        igraph_1.2.5       
## [67] reshape2_1.4.4      codetools_0.2-16    stats4_3.6.3       
## [70] fastmatch_1.1-0     picante_1.8.1       glue_1.4.0         
## [73] evaluate_0.14       data.table_1.12.8   BiocManager_1.30.10
## [76] vctrs_0.2.4         treeio_1.10.0       foreach_1.5.0      
## [79] cellranger_1.1.0    gtable_0.3.0        purrr_0.3.4        
## [82] assertthat_0.2.1    xfun_0.13           tidytree_0.3.3     
## [85] viridisLite_0.3.0   survival_3.1-8      truncnorm_1.0-8    
## [88] iterators_1.0.12    rvcheck_0.1.8       memoise_1.1.0      
## [91] IRanges_2.20.2      cluster_2.1.0       ellipsis_0.3.0

Theme

theme_pub<- function () { 
  theme_bw(base_size=10, base_family="Helvetica") +
  theme(axis.text = element_text(size=8, color = "black"),
        axis.title = element_text(size=10, color="black"), 
        legend.text = element_text(size=8, color = "black"), 
        legend.title = element_text(size=10, color = "black"),
        panel.border = element_rect(color="black", fill=NA))
}

Load dataset

nmr<-read_excel("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript_Rd2/data/metabo_human_nmr_selected.xlsx")

Heatmap of 23 features

# nmr dataset to long form
nmr_long <- nmr %>% pivot_longer(
  cols=Acetate:Xanthine,
  names_to="Metabolite",
  values_to="Value")

# scale values
nmr_long <- nmr_long %>%
  group_by(Metabolite) %>%
  mutate(Zscore = Value %>% scale (center = TRUE, scale = TRUE)) # calculate z-score

# distribution of values
ggplot(nmr_long, aes(x=Zscore)) + geom_histogram() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# test significance
results<- nmr_long %>%
  group_by(Metabolite) %>%
  do(
    broom::glance(t.test(Zscore~Ethnicity, data=., paired=F))
  ) %>%
  ungroup() %>%
  select(Metabolite, p.value) %>%
  mutate(p.adj = p.adjust(p.value, method="fdr")) %>%
  mutate(Significant=ifelse(p.value<0.05, "*",""))
Nice.Table(results)
# define row order (metabolites)
nmr_wide <- nmr_long %>% 
  pivot_wider(id_cols = "Metabolite", names_from="SAMPLE_ID", values_from="Zscore") %>% 
  column_to_rownames("Metabolite")
roworder <- hclust(dist(nmr_wide), method = "average")
roworder <- roworder$labels[roworder$order]

# define column order (subject)
nmr_wide <- nmr_long %>% 
  pivot_wider(id_cols = "SAMPLE_ID", names_from="Metabolite", values_from="Zscore") %>% 
  column_to_rownames("SAMPLE_ID")
colorder <- hclust(dist(nmr_wide), method = "average")
colorder <- colorder$labels[colorder$order]

# heatmap
nmr_long %>%
  mutate(Group=factor(Group, levels=c("Lean CA","Obese CA","Lean CH","Obese CH"))) %>%
  ggplot(aes(y=factor(Metabolite, levels=roworder), x=factor(`SAMPLE_ID`, levels=colorder), fill = Zscore)) +
  geom_tile() + 
  scale_fill_gradient2(low="cornflowerblue", high="indianred") +
  #scale_fill_distiller(type ="seq", palette = "RdBu", na.value = "gray90") + 
  coord_cartesian(expand=F) +
  ylab("") +
  xlab("") +
  facet_grid(~Ethnicity, margins=F, drop=T, scales="free", space="free") +
  theme_bw() +
  theme(axis.text = element_text(size = 8)) 

#ggsave("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript/figures/metab_human_heatmap_repfeats.pdf", height=4, width=5, useDingbats=F)

Heatmap of significant features

nmr_long <- nmr_long %>%
  filter(Metabolite %in% (results %>% filter(p.value<0.05))$Metabolite)

# define row order (metabolites)
nmr_wide <- nmr_long %>% 
  pivot_wider(id_cols = "Metabolite", names_from="SAMPLE_ID", values_from="Zscore") %>% 
  column_to_rownames("Metabolite")
roworder <- hclust(dist(nmr_wide), method = "average")
roworder <- roworder$labels[roworder$order]

# define column order (subject)
nmr_wide <- nmr_long %>% 
  pivot_wider(id_cols = "SAMPLE_ID", names_from="Metabolite", values_from="Zscore") %>% 
  column_to_rownames("SAMPLE_ID")
colorder <- hclust(dist(nmr_wide), method = "average")
colorder <- colorder$labels[colorder$order]

# heatmap
nmr_long %>%
  mutate(Group=factor(Group, levels=c("Lean CA","Obese CA","Lean CH","Obese CH"))) %>%
  ggplot(aes(y=factor(Metabolite, levels=roworder), x=factor(`SAMPLE_ID`, levels=colorder), fill = Zscore)) +
  geom_tile() + 
  scale_fill_gradient2(low="cornflowerblue", high="indianred") +
  #scale_fill_distiller(type ="seq", palette = "RdBu", na.value = "gray90") + 
  theme(axis.text = element_text(size = 8), axis.ticks = element_blank()) + 
  theme_pub() +
  coord_cartesian(expand=F) +
  ylab("Metabolite") +
  xlab("Subject") +
  labs(fill="Z-score") +
  facet_grid(~Ethnicity, margins=F, drop=T, scales="free", space="free")

#ggsave("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript/figures/human_metabolomics/metab_human_heatmap_sigfeats.pdf", height=2.2, width=5, useDingbats=F)